2015-06-08

大纲

样本模拟

  • 界限分明的基本分类样本

样本模拟

  • 界限分明的基本分类样本
x=rnorm(1500,mean=25,sd=4.5);
y=rnorm(1500,mean=16,sd=4.5);
dfnormal=data.frame(cc=1,x=x,y=y);

x=runif(400,min=2,max=13); 
y=runif(400,min=15,max=24);
dfnormal=rbind(dfnormal,data.frame(cc=2,x=x,y=y));

x=rnorm(200,mean=8,sd=1.5);
y=rnorm(200,mean=8,sd=1.5);
dfnormal=rbind(dfnormal,data.frame(cc=3,x=x,y=y));

#plot(dfnormal$x,dfnormal$y,col=dfnormal$cc)
plot(y~x,col=cc,dfnormal)

样本模拟

  • 不规则的甜甜圈样本

样本模拟

  • 不规则的甜甜圈样本
getcircle <- function(np,np2,cc,r,nsd){  #np=800;np2=100;r=25,nsd=2
  x=c(runif(np,min=-r,max=r),
      runif(np2,min=-r,max=-r+1),
      runif(np2,min=r-1,max=r))
  y=sign(rnorm(np+np2*2))*sqrt(r^2-x^2)+rnorm(np+np2*2,mean=2,sd=nsd);
  x=x+rnorm(np+np2*2,mean=2,sd=nsd);
  return(data.frame(cc=cc,x=x,y=y))
}
dfcircle=getcircle(800,100,1,25,2)
dfcircle=rbind(dfcircle,getcircle(300,50,2,13,1.6))
dfcircle=rbind(dfcircle,getcircle(50,10,3,3,1))

#plot(dfcircle$x,dfcircle$y,col=factor(dfcircle$cc))
plot(y~x,col=factor(cc),dfcircle)

kmeans聚类

  • 规则样本
#清空原分类
mdata=dfnormal
mdata$cc=NULL

#执行kmeans
cc=kmeans(mdata,5)
ccout=fitted(cc)

#查看聚类结果
table(dfnormal$cc, cc$cluster)
#  1  2  3  
#1  
#2  
#3  
plot(y~x,col=rownames(ccout),dfnormal)

kmeans聚类

  • 规则样本

kmeans聚类

  • 圈圈样本
#清空原分类
mdata=dfcircle
mdata$cc=NULL

#执行kmeans
cc=kmeans(mdata,3)
ccout=fitted(cc)

#查看聚类结果
table(dfcircle, cc$cluster)
#  1  2  3  
#1  
#2  
#3  
plot(y~x,col=rownames(ccout),dfcircle)

kmeans聚类

  • 圈圈样本

层次聚类

  • 规则样本
#清空原分类
mdata=dfnormal
mdata$cc=NULL

#执行hclust
hc = hclust(dist(mdata), "ave")
plot(hc)

#分类
ccout = cutree(hc, k = 5)
plot(y~x,col=ccout,dfnormal)

层次聚类

  • 规则样本

层次聚类

  • 规则样本(聚类树)
plot(hc)

层次聚类

  • 规则样本(字符串结构的聚类树)
  • dendrogram 更多功能
dend1 = as.dendrogram(hc)
str(dend1, max = 2)
## --[dendrogram w/ 2 branches and 2100 members at h = 18.6]
##   |--[dendrogram w/ 2 branches and 609 members at h = 11.9]
##   |  |--[dendrogram w/ 2 branches and 200 members at h = 3.88] ..
##   |  `--[dendrogram w/ 2 branches and 409 members at h = 6.24] ..
##   `--[dendrogram w/ 2 branches and 1491 members at h = 16.6]
##      |--leaf 265 
##      `--[dendrogram w/ 2 branches and 1490 members at h = 12.4] ..

层次聚类

  • 圈圈样本
#清空原分类
mdata=dfcircle
mdata$cc=NULL

#执行hclust
hc = hclust(dist(mdata), "ave")
plot(hc)

#分类
ccout = cutree(hc, k = 3)
plot(y~x,col=ccout,dfcircle)

层次聚类

  • 圈圈样本

层次聚类

  • 圈圈样本(神奇的圈圈)
hc = hclust(dist(mdata), "single")

其他

参考